#Gendered Interruption Behavior on the U.S. Courts of Appeals
#Replication Code
#Summer 2025
#-------------------------------------------------------------------------------
#INITIAL MATTER                                                                 
#-------------------------------------------------------------------------------
####                                 PACKAGES                               ####
#all necessary packages                                                         
rm(list = ls())

library(fixest)
library(dplyr)
library(readxl)
library(mintyr)
library(pdftools)
library(stringr)
library(tidyverse)
library(stargazer)
library(ggpattern)
library(ggplot2)
library(ggthemes)
library(cowplot)
library(extrafont)
library(dotwhisker)
library(gapminder)
library(survival)
library(survminer)

#### loading test cases ####
setwd("")
df <- read.csv("Gendered Interruption Behavior--Replication File.csv")                                                             #deleting the file names as redundant
 
####                                 CLEANING                               ####
#creating aggregate data
agg2 <- aggregate(df$total_chunks, by=list(df$case_name), FUN=mean)
agg2 <- agg2 %>% rename("chunks" = "x", "case_name" = "Group.1")

agg4a <- aggregate(df$type_positive, by=list(df$case_name), FUN=sum)
agg4a <- agg4a %>% rename("pos" = "x", "case_name" = "Group.1")

agg4b <- aggregate(df$type_negative, by=list(df$case_name), FUN=sum)
agg4b <- agg4b %>% rename("neg" = "x", "case_name" = "Group.1")

agg4c <- aggregate(df$type_neutral, by=list(df$case_name), FUN=sum)
agg4c <- agg4c %>% rename("neut" = "x", "case_name" = "Group.1")

agg4d <- aggregate(df$type_none, by=list(df$case_name), FUN=sum)
agg4d <- agg4d %>% rename("none" = "x", "case_name" = "Group.1")

aggs <- merge(agg2, agg4a, by = "case_name", all = T)
aggs <- merge(aggs, agg4b, by = "case_name", all = T)
aggs <- merge(aggs, agg4c, by = "case_name", all = T)
aggs <- merge(aggs, agg4d, by = "case_name", all = T)

aggs$ints <- (aggs$pos + aggs$neg)
aggs$ratio_ints <- (aggs$ints / aggs$chunks)
aggs$ratio_pos <- (aggs$pos / aggs$chunks)
aggs$ratio_neg <- (aggs$neg / aggs$chunks)
aggs$ratio_neut <- (aggs$neut / aggs$chunks)
aggs$ratio_none <- (aggs$none / aggs$chunks)

#creating aggregate data
agg1 <- aggregate(df$total_chunks, by=list(df$spec), FUN=mean)
agg1 <- agg1 %>% rename("chunks" = "x", "spec" = "Group.1")

agg1a <- aggregate(df$spec, by=list(df$spec), FUN = length)
agg1a <- agg1a %>% rename("spec_chunks" = "x", "spec" = "Group.1")

agg3a <- aggregate(df$type_positive, by=list(df$spec), FUN=sum)
agg3a <- agg3a %>% rename("pos" = "x", "spec" = "Group.1")

agg3b <- aggregate(df$type_negative, by=list(df$spec), FUN=sum)
agg3b <- agg3b %>% rename("neg" = "x", "spec" = "Group.1")

agg3c <- aggregate(df$type_neutral, by=list(df$spec), FUN=sum)
agg3c <- agg3c %>% rename("neut" = "x", "spec" = "Group.1")

agg3d <- aggregate(df$type_none, by=list(df$spec), FUN=sum)
agg3d <- agg3d %>% rename("none" = "x", "spec" = "Group.1")

aggs <- merge(agg1, agg1a, by = "spec", all = T)
aggs <- merge(aggs, agg3a, by = "spec", all = T)
aggs <- merge(aggs, agg3b, by = "spec", all = T)
aggs <- merge(aggs, agg3c, by = "spec", all = T)
aggs <- merge(aggs, agg3d, by = "spec", all = T)

aggs$spec_ints <- (aggs$pos + aggs$neg)
aggs$spec_ratio_ints <- (aggs$spec_ints / aggs$spec_chunks)
aggs$spec_ratio_pos <- (aggs$pos / aggs$spec_chunks)
aggs$spec_ratio_neg <- (aggs$neg / aggs$spec_chunks)
aggs$spec_ratio_neut <- (aggs$neut / aggs$spec_chunks)
aggs$spec_ratio_none <- (aggs$none / aggs$spec_chunks)

df <- merge(df, aggs, by="spec", all.x=TRUE)
df$chunk_rate <- df$spec_chunks/df$total_chunks

#speaker level data
ee1 <- aggregate(type_positive~spec, data=df, FUN=sum) 
ee2 <- aggregate(type_negative~spec, data=df, FUN=sum) 
ee3 <- aggregate(type_neutral~spec, data=df, FUN=sum)
ee4 <- aggregate(apology~spec, data=df, FUN=sum)
ee5 <- aggregate(permission~spec, data=df, FUN=sum)
ee6 <- aggregate(judge~spec, data=df, FUN=mean)
ee7 <- aggregate(total_chunks~spec, data=df, FUN=mean)
ee8 <- aggregate(gender~spec, data=df, FUN=mean)
ee9 <- aggregate(num_women~spec, data=df, FUN=mean)
ee10 <- aggregate(spec_chunks~spec, data=df, FUN=mean)
ee10 <- ee10 %>% rename("speaker_chunks" = "spec_chunks")
ee11 <- aggregate(tone_backchannel~spec, data=df, FUN=mean)

spkr_lvl <- merge(ee1, ee2, by="spec") 
spkr_lvl <- merge(spkr_lvl, ee3, by="spec") 
spkr_lvl <- merge(spkr_lvl, ee4, by="spec", all.x=T) 
spkr_lvl <- merge(spkr_lvl, ee5, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee6, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee7, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee8, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee9, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee10, by="spec", all.x=T)
spkr_lvl <- merge(spkr_lvl, ee11, by="spec", all.x=T)

silent <- data.frame (spec = c("20-40754--KING (CAROLYN D.)", "21-10620--ENGELHARDT (KURT D.)", 
           "21-1409--BRENNAN (MICHAEL B.)", "21-1494--CABRANES (JOSE)", "22-1348--WALKER (JOHN M.)",
           "22-2516--KIRSCH (THOMAS L.)", "22-2573--FLAUM (JOEL)", "22-2708--KEARSE (AMALYA)", 
           "22-3008--RIPPLE (KENNETH F.)", "22-7305--TRAXLER (WILLIAM B.)", "22-8521--WALKER (JUSTIN R.)", 
           "23-1060--MEIER (JOSEPH M.)", "23-3247--DAVIS (STEPHANIE D.)", "22-1070--SEYMOUR (STEPHANIE K.)",
           "22-2056--EID (ALLISON H.)", "22-11128--LAGOA (BARBARA)", "22-10441--DENNIS (JAMES L.)", 
           "22-10511--DENNIS (JAMES L.)", "22-3552--COLLOTON (STEVEN)", "22-2442--SMITH (LAVENSKI R.)"),
  type_positive = 0, type_negative = 0, type_neutral = 0, apology = 0, permission = 0,
  judge = 1, total_chunks = c(89, 78, 36, 64, 105, 45, 58, 110, 33, 159,
                              33, 89, 29, 77, 80, 152, 72, 192, 83, 117),
  gender = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
             0, 0, 1, 1, 1, 1, 0, 0, 0, 0),
  num_women = c(2, 0, 1, 1, 1, 1, 0, 3, 1, 0,
                1, 0, 2, 1, 1, 1, 0, 0, 0, 0),
  speaker_chunks = 0, tone_backchannel = 0)
spkr_lvl <- rbind(spkr_lvl, silent)

spkr_lvl$speaker_ints <- (spkr_lvl$type_positive + spkr_lvl$type_negative)
spkr_lvl$speaker <- str_split_i(spkr_lvl$spec, "--", 2)
spkr_lvl$case_no <- str_split_i(spkr_lvl$spec, "--", 1)

judge.data <- judge.data %>% rename("speaker" = "author")
spkr_lvl2 <- merge(spkr_lvl, judge.data, by="speaker", all.x=T)

spkr_lvl2$ratio_speaks <- spkr_lvl2$speaker_chunks / spkr_lvl2$total_chunks
spkr_lvl2$ratio_ints <- spkr_lvl2$speaker_ints / spkr_lvl2$speaker_chunks
spkr_lvl2$ratio_pos_ints <- spkr_lvl2$type_positive / spkr_lvl2$speaker_ints
spkr_lvl2$ratio_neg_ints <- spkr_lvl2$type_negative / spkr_lvl2$speaker_ints
spkr_lvl2$ratio_apology <- spkr_lvl2$apology / spkr_lvl2$speaker_ints
spkr_lvl2$ratio_permission <- spkr_lvl2$permission / spkr_lvl2$speaker_ints
spkr_lvl2$ratio_bc <- spkr_lvl2$tone_backchannel / spkr_lvl2$speaker_ints

spkr_lvl2$ratio_pos_ints[is.nan(spkr_lvl2$ratio_pos_ints)] <- 0
spkr_lvl2$ratio_neg_ints[is.nan(spkr_lvl2$ratio_neg_ints)] <- 0
spkr_lvl2$apology[is.na(spkr_lvl2$apology)] <- 0
spkr_lvl2$permission[is.na(spkr_lvl2$permission)] <- 0
spkr_lvl2$ratio_apology[is.na(spkr_lvl2$ratio_apology)] <- 0
spkr_lvl2$ratio_permission[is.na(spkr_lvl2$ratio_permission)] <- 0

#creating ideological disagreement/dist controls
sub.ideol <- subset(judge.data, select = c('jid', 'jcs_dw_score'))
s.ideol1 <- sub.ideol %>% rename("jid1" = "jid",
                                 "ideol1" = "jcs_dw_score")
s.ideol2 <- sub.ideol %>% rename("jid2" = "jid",
                                 "ideol2" = "jcs_dw_score")
s.ideol3 <- sub.ideol %>% rename("jid3" = "jid",
                                 "ideol3" = "jcs_dw_score")
df.merge2 <- merge(oa.files2, s.ideol1, by="jid1", all.x=TRUE)
df.merge2 <- merge(df.merge2, s.ideol2, by="jid2", all.x=TRUE)
df.merge2 <- merge(df.merge2, s.ideol3, by="jid3", all.x=TRUE)

df.merge2$jcs_spread <- apply(subset(df.merge2, select = c(ideol1, ideol2, ideol3)),
                              1, sd, na.rm=TRUE)

df.merge2$dist_presence <-ifelse(df.merge2$dist1 == 1 | df.merge2$dist2 == 1 | 
                                   df.merge2$dist3 == 1, 1, 0)

sub.ideol2 <- subset(df.merge2, select = c('cv', 'jcs_spread', 'dist_presence'))
sub.ideol2 <- sub.ideol2 %>% rename("case_no" = "cv")

#case level
k4 <- aggregate(interrupts~case_no, data=df, FUN=sum) 
k4a <- aggregate(num_women~case_no, data=df, FUN=mean)
k4b <- aggregate(type_positive~case_no, data=df, FUN=sum) 
k4c <- aggregate(type_negative~case_no, data=df, FUN=sum) 

k4d <- aggregate(total_chunks~case_no, data=df, FUN=mean)
k4e <- aggregate(dist_presence~case_no, data=sub.ideol2, FUN=mean)
k4f <- aggregate(jcs_spread~case_no, data=sub.ideol2, FUN=mean)

case_lvl <- merge(k4, k4a, by="case_no")
case_lvl <- merge(case_lvl, k4b, by="case_no")
case_lvl <- merge(case_lvl, k4c, by="case_no")
case_lvl <- merge(case_lvl, k4d, by="case_no")
case_lvl <- merge(case_lvl, k4e, by="case_no", all.x = T)
case_lvl <- merge(case_lvl, k4f, by="case_no", all.x = T)

case_sub <- subset(df, select = c('case_no', 'court'))
case_lvl <- merge(case_lvl, case_sub, by="case_no", all.x = T)
case_lvl  <- case_lvl [!duplicated(case_lvl$case_no),]
case_lvl <- merge(case_lvl, sub.ideol2, by="case_no", all.x=TRUE)

a_sub <- df[df$judge == 0,]
auni <- a_sub[!duplicated(a_sub$spec),]
auni2 <- as.data.frame(table(auni$case_no))
auni2 <- auni2 %>% rename("total_atty" = "Freq", "case_no" = "Var1")
case_lvl <- merge(case_lvl, auni2, by="case_no", all.x=TRUE)

#calculating number of women attorneys
a_spkr <- spkr_lvl2[spkr_lvl2$judge == 0,]
a_spkr <- a_spkr[a_spkr$speaker != "Clerk",]

aag <- aggregate(a_spkr$gender.x, by=list(a_spkr$case_no), FUN=sum)
aag <- aag %>% rename("num_wom_atty" = "x", "case_no" = "Group.1")
case_lvl <- merge(case_lvl, aag, by="case_no", all.x = T)
case_lvl$num_wom_atty <- as.numeric(case_lvl$num_wom_atty)
df <- merge(df, aag, by="case_no", all.x = T)
df$num_wom_atty <- as.numeric(df$num_wom_atty)
spkr_lvl <- merge(spkr_lvl, aag, by="case_no", all.x = T)

t <- a_spkr %>% group_by(case_no) %>% summarise(n_distinct(speaker))
table(t$`n_distinct(speaker)`)

####                                 SUBSETS                                ####
#judges/attorneys
j_sub <- df[df$judge == 1,]
a_sub <- df[df$judge == 0,]
a_sub<- a_sub[a_sub$speaker != "Clerk",]
j_sub <- merge(j_sub, sub.ideol2, by="case_no", all.x=TRUE)

j_spkr <- spkr_lvl2[spkr_lvl2$judge == 1,]
a_spkr <- spkr_lvl2[spkr_lvl2$judge == 0,]
a_spkr <- a_spkr[a_spkr$speaker != "Clerk",]

test <- as.data.frame(table(j_spkr$case_no)) 

#interruptions
int_sub <- subset(j_sub, interrupts == 1)
int_sub2 <- subset(a_sub, interrupts == 1)

pos_sub <- subset(df, type_positive == 1)
neg_sub <- subset(df, type_negative == 1)
neut_sub <- subset(df, type_neutral == 1)
no_sub <- subset(df, type_none == 1)

#number of women
spkr_lvl2$ratio_ints <- ifelse(is.nan(spkr_lvl2$ratio_ints), 0, spkr_lvl2$ratio_ints)

sub0 <- subset(spkr_lvl2, num_women == 0)
sub1 <- subset(spkr_lvl2, num_women == 1)
sub2 <- subset(spkr_lvl2, num_women == 2)
sub3 <- subset(spkr_lvl2, num_women == 3)

subj0 <- subset(sub0, judge == 1)
subj1 <- subset(sub1, judge == 1)
subj2 <- subset(sub2, judge == 1)
subj3 <- subset(sub3, judge == 1)

subj01 <- rbind(subj0, subj1)
subj12 <- rbind(subj1, subj2)
subj02 <- rbind(subj0, subj2)
subj23 <- rbind(subj2, subj3)
subj13 <- rbind(subj1, subj3)

msubj01 <- subj01[subj01$gender.x == 0,]
msubj12 <- subj12[subj12$gender.x == 0,]
msubj02 <- subj02[subj02$gender.x == 0,]
wsubj12 <- subj12[subj12$gender.x == 1,]
wsubj23 <- subj23[subj23$gender.x == 1,]
wsubj13 <- subj13[subj13$gender.x == 1,]

#ideology
con_sub <- subset(df, ideol > 0)
lib_sub <- subset(df, ideol < 0)

con_spkr <- subset(j_spkr, jcs_dw_score > 0)
lib_spkr <- subset(j_spkr, jcs_dw_score < 0)

con_ints <- subset(int_sub, ideol > 0)
lib_ints <- subset(int_sub, ideol < 0)

#unique judges
quick1 <- j_sub[!duplicated(j_sub$spec),]
quick2 <- j_sub[!duplicated(j_sub$speaker),]

#circuit level
c2 <- subset(df, court == "Second Cir.")
c3 <- subset(df, court == "Third Cir.")
c4 <- subset(df, court == "Fourth Cir.")
c5 <- subset(df, court == "Fifth Cir.")
c6 <- subset(df, court == "Sixth Cir.")
c7 <- subset(df, court == "Seventh Cir.")
c8 <- subset(df, court == "Eighth Cir.")
c9 <- subset(df, court == "Ninth Cir.")
c10 <- subset(df, court == "Tenth Cir.")
c11 <- subset(df, court == "Eleventh Cir.")
cdcc <- subset(df, court == "D.C. Cir.")

i2 <- subset(int_sub, court == "Second Cir.")
i3 <- subset(int_sub, court == "Third Cir.")
i4 <- subset(int_sub, court == "Fourth Cir.")
i5 <- subset(int_sub, court == "Fifth Cir.")
i6 <- subset(int_sub, court == "Sixth Cir.")
i7 <- subset(int_sub, court == "Seventh Cir.")
i8 <- subset(int_sub, court == "Eighth Cir.")
i9 <- subset(int_sub, court == "Ninth Cir.")
i10 <- subset(int_sub, court == "Tenth Cir.")
i11 <- subset(int_sub, court == "Eleventh Cir.")
idcc <- subset(int_sub, court == "D.C. Cir.")

j2 <- subset(quick1, court == "Second Cir.")
j3 <- subset(quick1, court == "Third Cir.")
j4 <- subset(quick1, court == "Fourth Cir.")
j5 <- subset(quick1, court == "Fifth Cir.")
j6 <- subset(quick1, court == "Sixth Cir.")
j7 <- subset(quick1, court == "Seventh Cir.")
j8 <- subset(quick1, court == "Eighth Cir.")
j9 <- subset(quick1, court == "Ninth Cir.")
j10 <- subset(quick1, court == "Tenth Cir.")
j11 <- subset(quick1, court == "Eleventh Cir.")
jdcc <- subset(quick1, court == "D.C. Cir.")

u2 <- subset(quick2, court == "Second Cir.")
u3 <- subset(quick2, court == "Third Cir.")
u4 <- subset(quick2, court == "Fourth Cir.")
u5 <- subset(quick2, court == "Fifth Cir.")
u6 <- subset(quick2, court == "Sixth Cir.")
u7 <- subset(quick2, court == "Seventh Cir.")
u8 <- subset(quick2, court == "Eighth Cir.")
u9 <- subset(quick2, court == "Ninth Cir.")
u10 <- subset(quick2, court == "Tenth Cir.")
u11 <- subset(quick2, court == "Eleventh Cir.")
udcc <- subset(quick2, court == "D.C. Cir.")

####                             FIGURE CLEANING                            ####
#for fig.1 -- section IV-B
tone1 <- aggregate(df$tone_rejection, by=list(df$case_name), FUN=sum)
tone1 <- tone1 %>% rename("rejection" = "x", "case_name" = "Group.1")
tone2 <- aggregate(df$tone_objection, by=list(df$case_name), FUN=sum)
tone2 <- tone2 %>% rename("objection" = "x", "case_name" = "Group.1")
tone3 <- aggregate(df$tone_change, by=list(df$case_name), FUN=sum)
tone3 <- tone3 %>% rename("topic_change" = "x", "case_name" = "Group.1")
tone4 <- aggregate(df$tone_administrative, by=list(df$case_name), FUN=sum)
tone4 <- tone4 %>% rename("administrative" = "x", "case_name" = "Group.1")
tone5 <- aggregate(df$tone_agreement, by=list(df$case_name), FUN=sum)
tone5 <- tone5 %>% rename("agreement" = "x", "case_name" = "Group.1")
tone6 <- aggregate(df$tone_listening, by=list(df$case_name), FUN=sum)
tone6 <- tone6 %>% rename("listening" = "x", "case_name" = "Group.1")
tone7 <- aggregate(df$tone_elaboration, by=list(df$case_name), FUN=sum)
tone7 <- tone7 %>% rename("elaboration" = "x", "case_name" = "Group.1")
tone8 <- aggregate(df$tone_backchannel, by=list(df$case_name), FUN=sum)
tone8 <- tone8 %>% rename("back_channel" = "x", "case_name" = "Group.1")

taggs <- merge(tone1, tone2, by = "case_name", all = T)
taggs <- merge(taggs, tone3, by = "case_name", all = T)
taggs <- merge(taggs, tone4, by = "case_name", all = T)
taggs <- merge(taggs, tone5, by = "case_name", all = T)
taggs <- merge(taggs, tone6, by = "case_name", all = T)
taggs <- merge(taggs, tone7, by = "case_name", all = T)
taggs <- merge(taggs, tone8, by = "case_name", all = T)

taggs1 <- taggs

#for fig.3 -- section V-A
tone1 <- aggregate(a_sub$tone_rejection, by=list(a_sub$case_name), FUN=sum)
tone1 <- tone1 %>% rename("rejection" = "x", "case_name" = "Group.1")
tone2 <- aggregate(a_sub$tone_objection, by=list(a_sub$case_name), FUN=sum)
tone2 <- tone2 %>% rename("objection" = "x", "case_name" = "Group.1")
tone3 <- aggregate(a_sub$tone_change, by=list(a_sub$case_name), FUN=sum)
tone3 <- tone3 %>% rename("topic_change" = "x", "case_name" = "Group.1")
tone4 <- aggregate(a_sub$tone_administrative, by=list(a_sub$case_name), FUN=sum)
tone4 <- tone4 %>% rename("administrative" = "x", "case_name" = "Group.1")
tone5 <- aggregate(a_sub$tone_agreement, by=list(a_sub$case_name), FUN=sum)
tone5 <- tone5 %>% rename("agreement" = "x", "case_name" = "Group.1")
tone6 <- aggregate(a_sub$tone_listening, by=list(a_sub$case_name), FUN=sum)
tone6 <- tone6 %>% rename("listening" = "x", "case_name" = "Group.1")
tone7 <- aggregate(a_sub$tone_elaboration, by=list(a_sub$case_name), FUN=sum)
tone7 <- tone7 %>% rename("elaboration" = "x", "case_name" = "Group.1")
tone8 <- aggregate(a_sub$tone_backchannel, by=list(a_sub$case_name), FUN=sum)
tone8 <- tone8 %>% rename("back_channel" = "x", "case_name" = "Group.1")

taggs <- merge(tone1, tone2, by = "case_name", all = T)
taggs <- merge(taggs, tone3, by = "case_name", all = T)
taggs <- merge(taggs, tone4, by = "case_name", all = T)
taggs <- merge(taggs, tone5, by = "case_name", all = T)
taggs <- merge(taggs, tone6, by = "case_name", all = T)
taggs <- merge(taggs, tone7, by = "case_name", all = T)
taggs <- merge(taggs, tone8, by = "case_name", all = T)
taggs$interrupter <- "attorney"

tone1 <- aggregate(j_sub$tone_rejection, by=list(j_sub$case_name), FUN=sum)
tone1 <- tone1 %>% rename("rejection" = "x", "case_name" = "Group.1")
tone2 <- aggregate(j_sub$tone_objection, by=list(j_sub$case_name), FUN=sum)
tone2 <- tone2 %>% rename("objection" = "x", "case_name" = "Group.1")
tone3 <- aggregate(j_sub$tone_change, by=list(j_sub$case_name), FUN=sum)
tone3 <- tone3 %>% rename("topic_change" = "x", "case_name" = "Group.1")
tone4 <- aggregate(j_sub$tone_administrative, by=list(j_sub$case_name), FUN=sum)
tone4 <- tone4 %>% rename("administrative" = "x", "case_name" = "Group.1")
tone5 <- aggregate(j_sub$tone_agreement, by=list(j_sub$case_name), FUN=sum)
tone5 <- tone5 %>% rename("agreement" = "x", "case_name" = "Group.1")
tone6 <- aggregate(j_sub$tone_listening, by=list(j_sub$case_name), FUN=sum)
tone6 <- tone6 %>% rename("listening" = "x", "case_name" = "Group.1")
tone7 <- aggregate(j_sub$tone_elaboration, by=list(j_sub$case_name), FUN=sum)
tone7 <- tone7 %>% rename("elaboration" = "x", "case_name" = "Group.1")
tone8 <- aggregate(j_sub$tone_backchannel, by=list(j_sub$case_name), FUN=sum)
tone8 <- tone8 %>% rename("back_channel" = "x", "case_name" = "Group.1")

taggs2 <- merge(tone1, tone2, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone3, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone4, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone5, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone6, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone7, by = "case_name", all = T)
taggs2 <- merge(taggs2, tone8, by = "case_name", all = T)
taggs2$interrupter <- "judge"

taggs3 <- rbind(taggs, taggs2)

####                                  IV-B                                  ####
####                                            descriptive statistics + t-tests
mean(df$interrupts, na.rm = TRUE)

tn_sub <- df[df$type == "true neutral",]
mean(tn_sub$length, na.rm = T)
bc_sub <- df[df$tone == "back channel",]
mean(bc_sub$length, na.rm = T)
a_sub <- df[df$tone == "administrative",]
mean(a_sub$length, na.rm = T)

####                                                            tables & figures
taggs1 %>% pivot_longer(cols=c(rejection, objection, topic_change, administrative, back_channel, agreement, elaboration, listening)) %>% 
  mutate(name = fct_relevel(name, "rejection", "objection", "topic_change", "administrative", "back_channel", "agreement", "elaboration", "listening")) %>%
  ggplot(aes(x=name, y=value, fill=judge)) + stat_summary(fun.data=mean_sdl, geom="bar",
                                                          fill = c("#d60036", "#d60036", "#d60036", "#ffb852", "#ffb852", "#007e62", "#007e62", "#007e62")) +
  labs(title="Average Interruption Type", x="interruption type", y="avg # (per case)") + theme_light() +
  theme(axis.text.x = element_text(size=10, angle=0)) +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))
####                                  IV-C                                  ####
####                                            descriptive statistics + t-tests
#judges
length(unique(j_sub$spec))    #376 judge instances (but 20 non-speaking)
table(quick1$gender)          #~31.65% women
length(unique(j_sub$speaker)) #187 unique judges
table(quick2$gender)          #~35.8% women
table(case_lvl$num_women)     #number of women judges by case

#attorneys
table(a_spkr$gender.x)        #~29.97% women
table(case_lvl$num_wom_atty)  #number of women attorneys by case
table(auni2$Freq)             #number of all attorneys by case

####                                                            tables & figures
#tab.1: circuit-level gender compositions
mean(case_lvl[case_lvl$court == "Second Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Second Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Third Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Third Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Fourth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Fourth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Fifth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Fifth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Sixth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Sixth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Seventh Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Seventh Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Eighth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Eighth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Ninth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Ninth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Tenth Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Tenth Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "Eleventh Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "Eleventh Cir.", 'total_atty'])
mean(case_lvl[case_lvl$court == "D.C. Cir.", 'num_wom_atty'])/mean(case_lvl[case_lvl$court == "D.C. Cir.", 'total_atty'])

mean(j2$gender_judge)
mean(j3$gender_judge)
mean(j4$gender_judge)
mean(j5$gender_judge)
mean(j6$gender_judge)
mean(j7$gender_judge)
mean(j8$gender_judge)
mean(j9$gender_judge)
mean(j10$gender_judge)
mean(j11$gender_judge)
mean(jdcc$gender_judge)

mean(u2$gender_judge)
mean(u3$gender_judge)
mean(u4$gender_judge)
mean(u5$gender_judge)
mean(u6$gender_judge)
mean(u7$gender_judge)
mean(u8$gender_judge)
mean(u9$gender_judge)
mean(u10$gender_judge)
mean(u11$gender_judge)
mean(udcc$gender_judge)

mean(case_lvl[case_lvl$court == "Second Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Third Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Fourth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Fifth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Sixth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Seventh Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Eighth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Ninth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Tenth Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "Eleventh Cir.", 'num_women'])
mean(case_lvl[case_lvl$court == "D.C. Cir.", 'num_women'])

####                                  IV-D                                  ####
summary(case_lvl$total_chunks)
summary(spkr_lvl2$speaker_chunks)
table(j_spkr$dist)
####                                   V-A                                  ####
####                                            descriptive statistics + t-tests
#average speaker instances and interruptions
mean(j_spkr$speaker_chunks[j_spkr$speaker_chunk > 0], na.rm = TRUE)
mean(a_spkr$speaker_chunks, na.rm = TRUE)
mean(j_spkr$speaker_chunks, na.rm = TRUE)          #footnote about silent judges

t.test(interrupts ~ judge, df)
t.test(interrupts ~ follows_judge, df)
table(int_sub$follows_judge)

mean(pos_sub$length)
mean(neg_sub$length)

#judicial interruptions by type
mean(df$type_positive[df$judge == 1 & df$interrupts == 1], na.rm = TRUE)
mean(df$type_negative[df$judge == 1 & df$interrupts == 1], na.rm = TRUE)

mean(df$tone_objection[df$judge == 1 & df$interrupts == 1], na.rm = TRUE)
mean(df$tone_objection[df$judge == 1 & df$interrupts == 1 & df$type_negative == 1], na.rm = TRUE)
mean(df$tone_elaboration[df$judge == 1 & df$interrupts == 1], na.rm = TRUE)
mean(df$tone_elaboration[df$judge == 1 & df$interrupts == 1 & df$type_positive == 1], na.rm = TRUE)
mean(df$tone_listening[df$judge == 1 & df$interrupts == 1], na.rm = TRUE)

#attorney interruptions by type
mean(df$type_positive[df$judge == 0 & df$interrupts == 1], na.rm = TRUE)
mean(df$type_negative[df$judge == 0 & df$interrupts == 1], na.rm = TRUE)

mean(df$tone_objection[df$judge == 0 & df$interrupts == 1], na.rm = TRUE)
mean(df$tone_agreement[df$judge == 0 & df$interrupts == 1], na.rm = TRUE)

#average word per speaker instance
t.test(length ~ judge, df)
t.test(length ~ gender_judge, df) #fn 151
t.test(length ~ gender_atty, df)  #fn 151

t.test(length ~ interrupts, df, subset = (judge == 1))
t.test(length ~ interrupts, df, subset = (judge == 0))
t.test(length ~ judge, df, subset = (type_positive == 1))
t.test(length ~ judge, df, subset = (type_negative == 1))

t.test(length ~ interrupts, df, subset = (gender_judge == 1)) #fn 152
t.test(length ~ interrupts, df, subset = (gender_judge == 0)) #fn 152
t.test(length ~ interrupts, df, subset = (gender_atty == 1))  #fn 152
t.test(length ~ interrupts, df, subset = (gender_atty == 0))  #fn 152

####                                               regressions & interpretations
m1 <- lm(spec_chunks ~ gender_judge + total_chunks + num_women, df)
m2 <- lm(spec_chunks ~ gender_atty + total_chunks + num_wom_atty, df)
summary(m1)
summary(m2)

####                                                            tables & figures
#fig.2
for.fig2 <- data.frame (
  interrupter = c("judges", "attorneys", "judges", "attorneys", "judges", "attorneys"),
  type = c("noninterruption", "noninterruption", "judge", "judge", "attorney", "attorney"),
  instances = c(10.45, 21.94, 0.10, 0.79, 8.87, 0)
  )
for.fig2 %>% mutate(type = fct_relevel(type, "noninterruption", "attorney", "judge")) %>%
  ggplot(aes(fill=type, y=instances, x=interrupter)) + theme_light() +
  geom_bar(position="stack", stat="identity") + labs(fill = "interruptee") +
  scale_fill_manual(values=c("lightsteelblue1","lightsteelblue3","black")) +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

#fig.3
taggs3 %>% pivot_longer(cols=c(rejection, objection, topic_change, administrative, back_channel, agreement, elaboration, listening)) %>% 
  mutate(name = fct_relevel(name, "rejection", "objection", "topic_change", "administrative", "back_channel", "agreement", "elaboration", "listening")) %>%
  ggplot(aes(x=name, y=value, fill=interrupter)) + theme_light() +
  geom_bar(position="dodge", stat="summary", fun.data=mean_sdl) +
  scale_fill_manual(values=c("lightsteelblue3","black")) +
  theme(axis.text.x = element_text(size=10, angle=0)) +
  labs(title="Average Interruption Type", x="interruption type", y="avg # (per case)") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

#fig.4
for.fig4 <- data.frame (
  speaker = c("judge", "attorney", "judge", "attorney", "judge", "attorney"),
  type = c("noninterruption", "noninterruption", "positive", "positive", "negative", "negative"),
  length = c(31, 79, 28, 36, 46, 54)
)
for.fig4 %>% mutate(type = fct_relevel(type, "noninterruption", "positive", "negative")) %>%
ggplot(aes(x=type, y=length, fill=speaker)) + theme_light() +
  geom_bar(position="dodge", stat="summary", fun.data=mean_sdl) +
  scale_fill_manual(values=c("lightsteelblue3","black")) +
  labs(title="Average # Words", x="interruption type", y="avg # words") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

####                                   V-B                                  ####
####                                            descriptive statistics + t-tests
#propensity
t.test(interrupts ~ gender_judge, df)
t.test(interrupts ~ gender_judge, df, subset = (follows_judge == 1)) #fn 107

#tone
t.test(type_positive ~ gender_judge, int_sub)
t.test(type_negative ~ gender_judge, int_sub)

t.test(type_positive ~ gender, int_sub, subset = (int_sub$follows_judge == 1))
t.test(type_negative ~ gender, int_sub, subset = (int_sub$follows_judge == 1))
t.test(type_neutral ~ gender, int_sub, subset = (int_sub$follows_judge == 1))

t2 <- df[df$follows_judge == 1 & df$judge == 1 & df$interrupts == 1]
table(t2$speaker)

t.test(type_neutral ~ gender_judge, int_sub)
t.test(type_admin ~ gender_judge, int_sub)
t.test(type_admin ~ gender_judge, int_sub, subset = (presiding == 0))
t.test(type_admin ~ presiding, int_sub)
t.test(presiding ~ type_admin, int_sub)

####                                               regressions & interpretations
#propensity
m1a <- lm(interrupts ~ gender_judge, df)
m1b <- lm(interrupts ~ gender_judge + spec_chunks + follows_woman + dist + lag_length, df)
m1b1 <- lm(interrupts ~ gender_judge + spec_chunks + follows_woman + dist + lag_length + case_name, df)
summary(m1a) #neg
summary(m1b) #neg
summary(m1b1) #neg

#tone
m2a <- lm(type_positive ~ gender_judge, int_sub)
m2d <- feols(type_positive ~ gender_judge | case_name, int_sub)
m2b <- lm(type_negative ~ gender_judge, int_sub)
m2e <- feols(type_negative ~ gender_judge | case_name, int_sub)
m2c <- lm(type_admin ~ gender_judge + presiding, int_sub)
m2f <- feols(type_admin ~ gender_judge + presiding | case_name, int_sub)

summary(m2a) #pos**
summary(m2d) #neg
summary(m2b) #neg***
summary(m2e) #neg
summary(m2c) #pos
summary(m2f) #pos (presid = pos***)

####                                                            tables & figures
#tab.2
stargazer(m1b, m1b1, align=TRUE,dep.var.labels.include = FALSE,
          covariate.labels = c("gender", "speaker instances", "follows woman", "district judge", 
                               "lagged length"))
#tab.3
stargazer(m2a, m2b, align=TRUE,dep.var.labels.include = FALSE,
          covariate.labels = c("gender", "constant"))

####                                   V-C                                  ####
####                                            descriptive statistics + t-tests
mean(subj0$ratio_ints)                                 #0.3856 men
t.test(ratio_ints ~ gender.x, subj1)                   #0.4837 men, 0.3852 women
t.test(ratio_ints ~ gender.x, subj2)                   #0.3693 men, 0.4195 women
mean(subj3$ratio_ints)                                 #            0.3742 women

mean(subj0$ratio_pos_ints)                             #0.3819 men
t.test(ratio_pos_ints ~ gender.x, subj1)               #0.3887 men, 0.3305 women
t.test(ratio_pos_ints ~ gender.x, subj2)               #0.3788 men, 0.3915 women
mean(subj3$ratio_pos_ints)                             #            0.3278 women
mean(subj0$ratio_neg_ints)                             #0.4848 men
t.test(ratio_neg_ints ~ gender.x, subj1)               #0.5271 men, 0.5349 women
t.test(ratio_neg_ints ~ gender.x, subj2)               #0.5522 men, 0.4937 women
mean(subj3$ratio_neg_ints)                             #            0.4222 women

t.test(ratio_ints ~ num_women, msubj01)                #p<0.05
t.test(ratio_ints ~ num_women, msubj12)                #p<0.05
t.test(ratio_ints ~ num_women, wsubj12)                #not statistically significant
t.test(ratio_ints ~ num_women, wsubj23)                #not statistically significant

t.test(ratio_pos_ints ~ num_women, msubj01)            #not statistically significant
t.test(ratio_pos_ints ~ num_women, msubj12)            #not statistically significant
t.test(ratio_pos_ints ~ num_women, msubj02)            #not statistically significant
t.test(ratio_pos_ints ~ num_women, wsubj12)            #not statistically significant
t.test(ratio_pos_ints ~ num_women, wsubj23)            #not statistically significant
t.test(ratio_pos_ints ~ num_women, wsubj13)            #not statistically significant

t.test(ratio_neg_ints ~ num_women, msubj01)            #not statistically significant
t.test(ratio_neg_ints ~ num_women, msubj12)            #not statistically significant
t.test(ratio_neg_ints ~ num_women, msubj02)            #not statistically significant
t.test(ratio_neg_ints ~ num_women, wsubj12)            #not statistically significant
t.test(ratio_neg_ints ~ num_women, wsubj23)            #not statistically significant
t.test(ratio_neg_ints ~ num_women, wsubj13)            #not statistically significant

####                                               regressions & interpretations
m3g1 <- lm(interrupts ~ num_women, data = df[df$gender_judge == 1])
m3h1 <- lm(type_negative ~ num_women, data = int_sub[int_sub$gender_judge == 1])
m3g2 <- lm(interrupts ~ num_women, data = df[df$gender_judge == 0])
m3h2 <- lm(type_negative ~ num_women, data = int_sub[int_sub$gender_judge == 0])

####                                                            tables & figures
#fig.5
for.fig5 <- data.frame (
  women = c("0", "1", "2", "1", "2", "3"),
  gender = c("men", "men", "men", "women", "women", "women"),
  ratio = c(0.3856, 0.4837, 0.3693, 0.3852, 0.4195, 0.3742)
)
ggplot(for.fig5, aes(x=women, y=ratio, fill=gender)) + theme_light() +
  geom_bar(position=position_dodge2(width=0.95,  preserve="single"), stat="identity") +
  scale_fill_manual(values=c("steelblue4","palevioletred3")) +
  labs(title="Average Interruption Rate", x="# panel women", y="interruption rate") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

#fig.6
for.fig6 <- data.frame (
  women = c("0", "1", "2", "1", "2", "3", "0", "1", "2", "1", "2", "3"),
  gender_type = c("men-positive", "men-positive", "men-positive", "women-positive", "women-positive", "women-positive", 
             "men-negative", "men-negative", "men-negative", "women-negative", "women-negative", "women-negative"),
  ratio = c(0.3819, 0.3887, 0.3788, 0.3305, 0.3915, 0.3278,
            0.4848, 0.5271, 0.5522, 0.5349, 0.4937, 0.4222)
)
for.fig6 %>% mutate(gender_type = fct_relevel(gender_type, "men-negative", "women-negative", "men-positive", "women-positive")) %>%
ggplot(aes(x=women, y=ratio, fill=gender_type, pattern = gender_type)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("none", "stripe", "none", "none", "stripe", "stripe", "none", 
                             "none", "stripe", "stripe", "none", "stripe"), 
                   pattern_color = c("steelblue3", "steelblue3", "palevioletred1", "palevioletred1", "steelblue3", "palevioletred1",
                                    "steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3", "steelblue4", "palevioletred3")) +
  labs(title="Average Interruption Ratio", x="# panel women", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("none", "none", "stripe", "stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1"))))

#tab.4
stargazer(m3g1, m3g2, m3h1, m3h2, align=TRUE,dep.var.labels.include = FALSE)

####                                   V-D                                  ####
####                                            descriptive statistics + t-tests
t.test(interrupts ~ gender_atty, df)
t.test(type_negative ~ gender_atty, int_sub2)
t.test(type_positive ~ gender_atty, int_sub2)

t.test(interrupts ~ follows_woman, df, subset = (df$gender_atty == 1))
t.test(interrupts ~ follows_woman, df, subset = (df$gender_atty == 0))
t.test(type_positive ~ gender_atty, int_sub2, subset = (df$follows_woman == 0))
t.test(type_positive ~ gender_atty, int_sub2, subset = (df$follows_woman == 1))
t.test(type_negative ~ gender_atty, int_sub2, subset = (df$follows_woman == 0))
t.test(type_negative ~ gender_atty, int_sub2, subset = (df$follows_woman == 1))

t.test(interrupts ~ follows_woman_atty, df, subset = (df$gender_judge == 1))
t.test(interrupts ~ follows_woman_atty, df, subset = (df$gender_judge == 0))
t.test(type_positive ~ gender_judge, int_sub, subset = (df$follows_woman_atty == 0))
t.test(type_positive ~ gender_judge, int_sub, subset = (df$follows_woman_atty == 1))
t.test(type_negative ~ gender_judge, int_sub, subset = (df$follows_woman_atty == 0))
t.test(type_negative ~ gender_judge, int_sub, subset = (df$follows_woman_atty == 1))

####                                               regressions & interpretations
m4z <- lm(interrupts ~ gender_atty, df)
m4y <- feols(interrupts ~ gender_atty + spec_chunks + follows_woman + lag_length | case_name, df)
m4a <- lm(type_positive ~ gender_atty, int_sub2)
m4b <- lm(type_negative ~ gender_atty, int_sub2)
m4d <- feols(type_positive ~ gender_atty | case_name, int_sub2)
m4e <- feols(type_negative ~ gender_atty | case_name, int_sub2)

summary(m4z) #neg**
summary(m4y) #neg
summary(m4a) #pos*
summary(m4b) #neg***
summary(m4d) #pos
summary(m4e) #neg*

####                                                            tables & figures
#fig.7
for.fig7 <- data.frame (
  judge_gender = c("men", "men", "women", "women", "men", "men", "women", "women"),
  attorney_gender_type = c("men-positive", "women-positive", "men-positive", "women-positive",
                           "men-negative", "women-negative", "men-negative", "women-negative"),
  ratio = c(0.4397, 0.5938, 0.4133, 0.5714, 0.5, 0.2188, 0.4667, 0.2857)
)
for.fig7 %>% mutate(attorney_gender_type = fct_relevel(attorney_gender_type, "men-negative", "women-negative", "men-positive", "women-positive")) %>%
  ggplot(aes(x=judge_gender, y=ratio, fill=attorney_gender_type)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("none", "none", "stripe", "stripe", "none", "none", "stripe", "stripe"), 
                   pattern_color = c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1",
                                     "steelblue3", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3", "steelblue4", "palevioletred3")) +
  labs(title="Average Interruption Ratio", x="judge gender", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("none", "none", "stripe", "stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1"))))

#fig.8
for.fig8 <- data.frame (
  attorney_gender = c("men", "men", "women", "women", "men", "men", "women", "women"),
  judge_gender_type = c("men-positive", "women-positive", "men-positive", "women-positive",
                        "men-negative", "women-negative", "men-negative", "women-negative"),
  ratio = c(0.4099, 0.3875, 0.3696, 0.5318, 0.5683, 0.5413, 0.6079, 0.4277)
)
for.fig8 %>% mutate(judge_gender_type = fct_relevel(judge_gender_type, "men-negative", "women-negative", "men-positive", "women-positive")) %>%
  ggplot(aes(x=attorney_gender, y=ratio, fill=judge_gender_type)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("none", "none", "stripe", "stripe", "none", "none", "stripe", "stripe"), 
                   pattern_color = c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1",
                                     "steelblue3", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3", "steelblue4", "palevioletred3")) +
  labs(title="Average Interruption Ratio", x="attorney gender", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("none", "none", "stripe", "stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1"))))

####                                   V-E                                  ####
####                                            descriptive statistics + t-tests
mean(int_sub$apology, na.rm = T)

t.test(apology ~ gender_judge, int_sub)
t.test(apology ~ gender_atty, int_sub2)
t.test(apology ~ gender_judge, int_sub, subset= follows_judge == 1)

t.test(apology ~ gender_judge, pos_sub)
t.test(apology ~ gender_atty, pos_sub)
t.test(apology ~ gender_judge, neg_sub)
t.test(apology ~ gender_atty, neg_sub)

####                                               regressions & interpretations
a1a <- lm(apology ~ gender_judge, int_sub)
a1b <- feols(apology ~ gender_judge | case_name, int_sub)
summary(a1a) #pos***
summary(a1b) #pos

####                                                            tables & figures
#fig.9
for.fig9 <- data.frame (
  interrupter = c("judges", "judges", "attorneys", "attorneys", "judges", "judges", "attorneys", "attorneys"),
  judge_gender_type = c("men-positive", "women-positive", "men-positive", "women-positive", 
           "men-negative", "women-negative", "men-negative","women-negative"),
  instances = c(0.0167, 0.0490, 0.0123, 0.0645, 0.0056, 0.0321, 0.0215, 0.0769)
)

for.fig9 %>% mutate(judge_gender_type = fct_relevel(judge_gender_type, "men-negative", "women-negative", "men-positive", "women-positive")) %>%
  ggplot(aes(x=interrupter, y=instances, fill=judge_gender_type)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("none", "none", "stripe", "stripe", "none", "none", "stripe", "stripe"), 
                   pattern_color = c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1", 
                                     "steelblue3", "palevioletred1", "steelblue3", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3", "steelblue4", "palevioletred3",
                             "steelblue4", "palevioletred3", "steelblue4", "palevioletred3")) +
  labs(title="Average Apology Rate", x="interrupter", y="apology rate") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("none", "none", "stripe", "stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1"))))

####                                   V-F                                  ####
####                                            descriptive statistics + t-tests
t.test(jcs_dw_score ~ gender.x, j_spkr)

mean(con_spkr$speaker_chunks[con_spkr$speaker_chunk > 0])
mean(lib_spkr$speaker_chunks[lib_spkr$speaker_chunk > 0])
mean(con_sub$length)
mean(lib_sub$length)
mean(con_sub$interrupts)
mean(lib_sub$interrupts)

table(lib_spkr$gender.x)
table(con_spkr$gender.x)

mean(con_ints$type_positive)
mean(con_ints$type_negative)
mean(lib_ints$type_positive)
mean(lib_ints$type_negative)
t.test(type_positive ~ gender_judge, con_ints)
t.test(type_positive ~ gender_judge, lib_ints)
t.test(type_negative ~ gender_judge, con_ints)
t.test(type_negative ~ gender_judge, lib_ints)

mean(con_ints$apology)
mean(lib_ints$apology)
t.test(apology ~ gender_judge, con_ints)
t.test(apology~ gender_judge, lib_ints)

####                                                            tables & figures
#tab.5
t.test(speaker_chunks ~ gender.x, con_spkr, subset = speaker_chunks > 0)
t.test(speaker_chunks ~ gender.x, lib_spkr, subset = speaker_chunks > 0)
t.test(length ~ gender_judge, con_sub)
t.test(length ~ gender_judge, lib_sub)
t.test(interrupts ~ gender_judge, con_sub)
t.test(interrupts ~ gender_judge, lib_sub)

#fig.10
for.fig10 <- data.frame (
  ideology = c("conservative", "conservative", "liberal", "liberal", "conservative", "conservative", "liberal", "liberal"),
  gender_type = c("men-positive", "women-positive", "men-positive", "women-positive",
                   "men-negative", "women-negative", "men-negative", "women-negative"),
  ratio = c(0.3988, 0.4677, 0.3855, 0.4215, 0.5775, 0.5081, 0.5839, 0.5378)
)
for.fig10 %>% mutate(gender_type = fct_relevel(gender_type, "men-negative", "women-negative", "men-positive", "women-positive")) %>%
  ggplot(aes(x=ideology, y=ratio, fill=gender_type, pattern = gender_type)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("none", "none", "stripe", "stripe", "none", "none", "stripe", "stripe"), 
                   pattern_color = c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1",
                                     "steelblue3", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3", "steelblue4", "palevioletred3")) +
  labs(title="Average Interruption Ratio", x="ideology", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("none", "none", "stripe", "stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1", "steelblue3", "palevioletred1"))))








####                                   V-G                                  ####
####                                               regressions & interpretations
int2 <- lm(interrupts ~ gender_judge, data = c2)
int3 <- lm(interrupts ~ gender_judge, data = c3)
int4 <- lm(interrupts ~ gender_judge, data = c4)
int5 <- lm(interrupts ~ gender_judge, data = c5)
int6 <- lm(interrupts ~ gender_judge, data = c6)
int7 <- lm(interrupts ~ gender_judge, data = c7)
int8 <- lm(interrupts ~ gender_judge, data = c8)
int9 <- lm(interrupts ~ gender_judge, data = c9)
int10 <- lm(interrupts ~ gender_judge, data = c10)
int11 <- lm(interrupts ~ gender_judge, data = c11)
intDC <- lm(interrupts ~ gender_judge, data = cdcc)

pint2 <- lm(type_positive ~ gender_judge, data = i2)
pint3 <- lm(type_positive ~ gender_judge, data = i3)
pint4 <- lm(type_positive ~ gender_judge, data = i4)
pint5 <- lm(type_positive ~ gender_judge, data = i5)
pint6 <- lm(type_positive ~ gender_judge, data = i6)
pint7 <- lm(type_positive ~ gender_judge, data = i7)
pint8 <- lm(type_positive ~ gender_judge, data = i8)
pint9 <- lm(type_positive ~ gender_judge, data = i9)
pint10 <- lm(type_positive ~ gender_judge, data = i10)
pint11 <- lm(type_positive ~ gender_judge, data = i11)
pintDC <- lm(type_positive ~ gender_judge, data = idcc)

nint2 <- lm(type_negative ~ gender_judge, data = i2)
nint3 <- lm(type_negative ~ gender_judge, data = i3)
nint4 <- lm(type_negative ~ gender_judge, data = i4)
nint5 <- lm(type_negative ~ gender_judge, data = i5)
nint6 <- lm(type_negative ~ gender_judge, data = i6)
nint7 <- lm(type_negative ~ gender_judge, data = i7)
nint8 <- lm(type_negative ~ gender_judge, data = i8)
nint9 <- lm(type_negative ~ gender_judge, data = i9)
nint10 <- lm(type_negative ~ gender_judge, data = i10)
nint11 <- lm(type_negative ~ gender_judge, data = i11)
nintDC <- lm(type_negative ~ gender_judge, data = idcc)

summary(pint2)

####                                                            tables & figures
#fig.11
dwplot(list(int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, intDC),
       dodge_size = 1.15) + theme_bw() + theme(legend.position = "none") +
  aes(color = estimate > 0) + geom_vline(xintercept = 0, colour = "red3", lty = 2, lwd = 0.75) +
  scale_color_manual(values = c("black", "black", "black", "black", "black", "black", 
                                "black", "black", "black", "black", "black")) +
  xlim(-0.37, 0.25)

#fig.12
dwplot(list(pint2, nint2, pint3, nint3, pint4, nint4, pint5, nint5, pint6, nint6,
            pint7, nint7, pint8, nint8, pint9, nint9, pint10, nint10, pint11, nint11,
            pintDC, nintDC), dodge_size = 1.15,
       dot_args = list(shape = c(16, 17, 16, 17, 16, 17, 16, 17, 16, 17, 16, 17,
                                16, 17, 16, 17, 16, 17, 16, 17, 16, 17), 
                       size = 2.5, color = c("black", "darkgrey", "black", "darkgrey", "black", "darkgrey",
                                            "black", "darkgrey", "black", "darkgrey", "black", "darkgrey", "black", 
                                            "darkgrey", "black", "darkgrey", "black", "darkgrey", "black", "darkgrey", 
                                            "black", "darkgrey")), 
       whisker_args = list(color = c("black", "darkgrey", "black", "darkgrey", "black", "darkgrey",
                           "black", "darkgrey", "black", "darkgrey", "black", "darkgrey", "black", 
                           "darkgrey", "black", "darkgrey", "black", "darkgrey", "black", "darkgrey", 
                           "black", "darkgrey"))) + theme_bw() + 
  aes(color = estimate > 0) + geom_vline(xintercept = 0, colour = "red", lty = 2, lwd = 0.75) +
  xlim(-0.5, 0.5)

####                                APPENDIX C                              ####
t.test(tone_backchannel ~ gender_judge, int_sub)
t.test(tone_backchannel ~ gender_atty, int_sub2)

summary(subj0$ratio_bc)                                # 0 men
t.test(ratio_bc ~ gender.x, subj1)                     # 0 men, 0 women
t.test(ratio_bc ~ gender.x, subj2)                     # 0 men, 0 women
summary(subj3$ratio_bc)                                #        0 women

#fig.c1
for.figc1 <- data.frame (
  speaker = c("judge", "judge", "attorney", "attorney"),
  type = c("men", "women", "men", "women"),
  length = c(0.0066, 0.0096, 0.0628, 0.0943)
)
ggplot(for.figc1, aes( x=speaker, y=length, fill=type)) + theme_light() +
  geom_bar(position="dodge", stat="identity") +
  scale_fill_manual(values=c("steelblue4","palevioletred3")) +
  labs(title="", x="interrupter", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

####                                APPENDIX D                              ####
spkr_lvl$all_women <- spkr_lvl$num_women + spkr_lvl$num_wom_atty
spkr_lvl$ratio_ints <- (spkr_lvl$speaker_ints)/(spkr_lvl$speaker_chunks)
spkr_lvl$ratio_pos_ints <- (spkr_lvl$type_positive)/(spkr_lvl$speaker_ints)
spkr_lvl$ratio_neg_ints <- (spkr_lvl$type_negative)/(spkr_lvl$speaker_ints)

mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 0 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_ints[spkr_lvl$all_women == 6 & spkr_lvl$gender == 1], na.rm = T)

mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 0 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_pos_ints[spkr_lvl$all_women == 6 & spkr_lvl$gender == 1], na.rm = T)

mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 0 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 0], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 1 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 2 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 3 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 4 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 5 & spkr_lvl$gender == 1], na.rm = T)
mean(spkr_lvl$ratio_neg_ints[spkr_lvl$all_women == 6 & spkr_lvl$gender == 1], na.rm = T)

#fig.d1
for.figd1 <- data.frame (
  women = c("0", "1", "2", "3", "4", "5",
            "1", "2", "3", "4", "5", "6"),
  gender = c("men", "men", "men", "men", "men", "men",
             "women", "women", "women", "women", "women", "women"),
  ratio = c(0.2552, 0.2558, 0.2570, 0.2270, 0.2377, 0.1626,
            0.1664, 0.2536, 0.2429, 0.1723, 0.1670, 0.1406)
)
ggplot(for.figd1, aes(x=women, y=ratio, fill=gender)) + theme_light() +
  geom_bar(position=position_dodge2(width=0.95,  preserve="single"), stat="identity") +
  scale_fill_manual(values=c("steelblue4","palevioletred3")) +
  labs(title="Average Interruption Rate", x="# total women", y="interruption rate") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

#fig.d2
for.figd2 <- data.frame (
    women = c("0", "1", "2", "3", "4", "5",
              "1", "2", "3", "4", "5", "6"),
    gender = c("men", "men", "men", "men", "men", "men",
               "women", "women", "women", "women", "women", "women"),
    ratio = c(0.5058, 0.4245, 0.4348, 0.4630, 0.3182, 0.5000,
              0.4464, 0.4957, 0.4589, 0.5174, 0.3355, 0.8393)
)
ggplot(for.figd2, aes(x=women, y=ratio, fill=gender, pattern = gender)) + theme_light() +
  geom_bar_pattern(position=position_dodge2(width=0.95,  preserve="single"), stat="identity",
                   pattern=c("stripe"), 
                   pattern_color = c("steelblue3", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "steelblue3",
                                     "palevioletred1", "steelblue3", "palevioletred1", "steelblue3", "palevioletred1", "palevioletred1"),
                   pattern_density = 0.05, pattern_spacing = 0.025) +
  scale_fill_manual(values=c("steelblue4", "palevioletred3")) +
  labs(title="Average Interruption Ratio", x="# total women", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12)) +
  guides(fill = guide_legend(override.aes = list(pattern=c("stripe"),
                                                 pattern_color=c("steelblue3", "palevioletred1"))))

#fig.d3
for.figd3 <- data.frame (
  women = c("0", "1", "2", "3", "4", "5",
            "1", "2", "3", "4", "5", "6"),
  gender = c("men", "men", "men", "men", "men", "men",
             "women", "women", "women", "women", "women", "women"),
  ratio = c(0.4942, 0.5755, 0.5652, 0.5370, 0.6818, 0.5000,
            0.5536, 0.5043, 0.5411, 0.4826, 0.6645, 0.1607)
)
ggplot(for.figd3, aes(x=women, y=ratio, fill=gender)) + theme_light() +
  geom_bar(position=position_dodge2(width=0.95,  preserve="single"), stat="identity") +
  scale_fill_manual(values=c("steelblue4","palevioletred3")) +
  labs(title="Average Interruption Rate", x="# total women", y="% interruptions") +
  theme(text=element_text(family="Times New Roman", face="bold", color="black", size=12))

####                                APPENDIX E                              ####
cideol <- df[, c(3, 8)]
cideol <- cideol[!duplicated(cideol$speaker),]

#tab.e1
ctagg2 <- aggregate(ideol~speaker, data=df, subset=(court == "Second Cir."), FUN=mean)
mean(ctagg2$ideol) 
ctagg2 <- merge(ctagg2, cideol, by="speaker", all.x=TRUE)
mean(ctagg2$ideol[ctagg2$gender == 0], na.rm = TRUE)
mean(ctagg2$ideol[ctagg2$gender == 1], na.rm = TRUE)

ctagg3 <- aggregate(ideol~speaker, data=df, subset=(court == "Third Cir."), FUN=mean)
mean(ctagg3$ideol) 
ctagg3 <- merge(ctagg3, cideol, by="speaker", all.x=TRUE)
mean(ctagg3$ideol[ctagg3$gender == 0], na.rm = TRUE)
mean(ctagg3$ideol[ctagg3$gender == 1], na.rm = TRUE)

ctagg4 <- aggregate(ideol~speaker, data=df, subset=(court == "Fourth Cir."), FUN=mean)
mean(ctagg4$ideol) 
ctagg4 <- merge(ctagg4, cideol, by="speaker", all.x=TRUE)
mean(ctagg4$ideol[ctagg4$gender == 0], na.rm = TRUE)
mean(ctagg4$ideol[ctagg4$gender == 1], na.rm = TRUE)

ctagg5 <- aggregate(ideol~speaker, data=df, subset=(court == "Fifth Cir."), FUN=mean)
mean(ctagg5$ideol) 
ctagg5 <- merge(ctagg5, cideol, by="speaker", all.x=TRUE)
mean(ctagg5$ideol[ctagg5$gender == 0], na.rm = TRUE)
mean(ctagg5$ideol[ctagg5$gender == 1], na.rm = TRUE)

ctagg6 <- aggregate(ideol~speaker, data=df, subset=(court == "Sixth Cir."), FUN=mean)
mean(ctagg6$ideol) 
ctagg6 <- merge(ctagg6, cideol, by="speaker", all.x=TRUE)
mean(ctagg6$ideol[ctagg6$gender == 0], na.rm = TRUE)
mean(ctagg6$ideol[ctagg6$gender == 1], na.rm = TRUE)

ctagg7 <- aggregate(ideol~speaker, data=df, subset=(court == "Seventh Cir."), FUN=mean)
mean(ctagg7$ideol) 
ctagg7 <- merge(ctagg7, cideol, by="speaker", all.x=TRUE)
mean(ctagg7$ideol[ctagg7$gender == 0], na.rm = TRUE)
mean(ctagg7$ideol[ctagg7$gender == 1], na.rm = TRUE)

ctagg8 <- aggregate(ideol~speaker, data=df, subset=(court == "Eighth Cir."), FUN=mean)
mean(ctagg8$ideol) 
ctagg8 <- merge(ctagg8, cideol, by="speaker", all.x=TRUE)
mean(ctagg8$ideol[ctagg8$gender == 0], na.rm = TRUE)
mean(ctagg8$ideol[ctagg8$gender == 1], na.rm = TRUE)

ctagg9 <- aggregate(ideol~speaker, data=df, subset=(court == "Ninth Cir."), FUN=mean)
mean(ctagg9$ideol) 
ctagg9 <- merge(ctagg9, cideol, by="speaker", all.x=TRUE)
mean(ctagg9$ideol[ctagg9$gender == 0], na.rm = TRUE)
mean(ctagg9$ideol[ctagg9$gender == 1], na.rm = TRUE)

ctagg10 <- aggregate(ideol~speaker, data=df, subset=(court == "Tenth Cir."), FUN=mean)
mean(ctagg10$ideol) 
ctagg10 <- merge(ctagg10, cideol, by="speaker", all.x=TRUE)
mean(ctagg10$ideol[ctagg10$gender == 0], na.rm = TRUE)
mean(ctagg10$ideol[ctagg10$gender == 1], na.rm = TRUE)

ctagg11 <- aggregate(ideol~speaker, data=df, subset=(court == "Eleventh Cir."), FUN=mean)
mean(ctagg11$ideol) 
ctagg11 <- merge(ctagg11, cideol, by="speaker", all.x=TRUE)
mean(ctagg11$ideol[ctagg11$gender == 0], na.rm = TRUE)
mean(ctagg11$ideol[ctagg11$gender == 1], na.rm = TRUE)

ctaggdc <- aggregate(ideol~speaker, data=df, subset=(court == "D.C. Cir."), FUN=mean)
mean(ctaggdc$ideol) 
ctaggdc <- merge(ctaggdc, cideol, by="speaker", all.x=TRUE)
mean(ctaggdc$ideol[ctaggdc$gender == 0], na.rm = TRUE)
mean(ctaggdc$ideol[ctaggdc$gender == 1], na.rm = TRUE)